package com.larry.mapreduce.etl;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * @ProjectName MapReduceDemo
 * @Author larry
 * @Date 21:26 2021/11/9
 * @Description 数据清洗
 *
 * 58.215.204.118 - - [18/Sep/2013:06:51:41 +0000] "-" 400 0 "-" "-"
 **/
public class WebLogMapper extends Mapper<LongWritable, Text,Text, NullWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();

        boolean result = parseLog(line,context);

        if (!result){
            return;
        }

        context.write(value,NullWritable.get());
    }

    //数据判定
    private boolean parseLog(String line, Context context) {

        String[] split = line.split(" ");

        if (split.length > 11){
            return true;
        }else {
            return false;
        }
    }
}
